#Part 1 The purpose of this notebook is to determine the selective benefit of glucosinolate and flavonoid compounds through plant competition by creating selection gradients. Selection gradients will involve final body mass as the proxy for fitness, but this will be replaced with fitness once the measurement is in. Concentration will be on the x-axis. This analysis will account for family and greenhouse location.

#Part 2 The second purpose is to determine if glucosinolates and flavonoids influence suscpetibility to pathogens and if this susceptibility results in increased fitness

#Read in and prep data

library(ggplot2)
library(lme4)
Loading required package: Matrix
library(tidyr)

Attaching package: ‘tidyr’

The following objects are masked from ‘package:Matrix’:

    expand, pack, unpack
library(cowplot)

********************************************************
Note: As of version 1.0.0, cowplot does not change the
  default ggplot2 theme anymore. To recover the previous
  behavior, execute:
  theme_set(theme_cowplot())
********************************************************
library(grid)
library(gridExtra)

Attaching package: ‘gridExtra’

The following object is masked from ‘package:dplyr’:

    combine
library(dplyr)
source("GGPlot_Themes.R")

#read in data
rm(list=ls())
dat<-read.csv("DataSynthesis.csv")

#Remove maple controls data from the data set.
dat<-dat %>% filter(treatment!="mcnt") %>% droplevels()

#Assign family column
prefamily<-gsub("*.\\|","",dat$Tag)
dat$Family<-gsub("\\-.*","",prefamily)

#remove those with fertilizer treatment, and extra genotypes that are only in the alone treatment. 
dat<-dat[!grepl("i",dat$Sample,fixed=T),]

#Initializing columns to avoid constant error messages. 
dat$WhiteFungLogis<-NA
dat$BlackFungLogis<-NA

#Generating data frames with summarized leaf data (dat2) and summarized genotype data (dat3)

#Summarizing: Taking the mean value of leaves. This tibble contains data at the level of the plant means. 
dat2<-dat %>% group_by(Tag) %>% summarize(ChlorA=mean(ChlorA),ChlorB=mean(ChlorB),gluc_Conc=mean(gluc_Conc),flav_Conc=mean(flav_Conc),Family=first(Family),treatment=first(treatment),gh_row=first(gh_row),gh_bench=first(gh_bench),GM_TotalLeaf_Area=first(GM_TotalLeaf_Area),comp_number=first(comp_number),ThripsDam=mean(ThripsDam),WhiteFungDam=mean(WhiteFungDam),BlackPathDam=mean(BlackPathDam),Fern=mean(Fern),gh_col=first(gh_col))
dat

#All of these genotypes died (15 Genotypes).(We are simply missing a final measurement for e|JBCHY1-1-50|Q|240) That is only 3% Mortality. 
dead<-dat2[is.na(dat2$GM_TotalLeaf_Area),]

#Removing those with dead competitors from the garlic mustard treatment. ("e|JBCHY1-1-50|Q|240") did not die, we are simply missing the final measurement for it.

dead_competitors<-dead %>% filter(treatment=="gm",Tag!="e|JBCHY1-1-50|Q|240") %>% select(comp_number)

#Removing those with dead competitors from the analysis. 
dat2<-dat2 %>% filter(!comp_number %in% dead_competitors$comp_number)


##Summarizing: Taking the mean value of plants. This tibble contains data at the level of the family means within each treatment. 
dat3<-dat2 %>% drop_na(GM_TotalLeaf_Area) %>% group_by(Family,treatment)  %>% summarize_if(is.numeric,mean)

#Searching for a fitness trade-off between the alone and interspecific competition treatment.

source("GGPlot_Themes.R")

#Calculating family means within treatment. 
TradeOff<-dat3 %>% filter(treatment=="a") %>% drop_na(GM_TotalLeaf_Area) %>%  select(LeafSizeAlone=GM_TotalLeaf_Area,Family,gluc_ConcAlone=gluc_Conc) %>% right_join(dat3 %>% filter(treatment=="m") %>%  select(LeafSizeMaple=GM_TotalLeaf_Area,Family,gluc_ConcMaple=gluc_Conc),by="Family")

#Calculating standard error of each family
StdErr<-dat2 %>% select(Family,treatment,GM_TotalLeaf_Area) %>% group_by(Family,treatment) %>% drop_na(GM_TotalLeaf_Area) %>%  summarize(StdErr=sd(GM_TotalLeaf_Area)/sqrt(length(GM_TotalLeaf_Area)),size=length(GM_TotalLeaf_Area))

#Shifting the data to be in long form. 
StdErr2<-StdErr %>% filter(treatment=="a") %>% select(StdErrAlone=StdErr,Family) %>% right_join(StdErr %>% filter(treatment=="m") %>% select(StdErrMaple=StdErr,Family),by="Family")

TradeOff2<-StdErr2 %>% left_join(TradeOff)
Joining, by = "Family"
#tiff("Selection_Figures/TradeOff.tiff", units="in", width=10, height=6, res=300)

ggplot(TradeOff2,aes(y=LeafSizeAlone,x=LeafSizeMaple))+
  geom_point()+
  geom_linerange(aes(ymin = LeafSizeAlone - StdErrAlone, 
                    ymax = LeafSizeAlone + StdErrAlone))+
  geom_errorbarh(aes(xmin = LeafSizeMaple - StdErrMaple,
                    xmax = LeafSizeMaple + StdErrMaple))+
theme_simple()+
  xlab("Performance with Maple")+
  ylab("Performance Alone")

#dev.off()
summary(lm(LeafSizeAlone~LeafSizeMaple,data=TradeOff2))

Call:
lm(formula = LeafSizeAlone ~ LeafSizeMaple, data = TradeOff2)

Residuals:
    Min      1Q  Median      3Q     Max 
-2019.5  -657.0  -265.5   907.7  2364.0 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)   9590.03571 1616.26626   5.933 6.87e-06 ***
LeafSizeMaple   -0.00936    0.18258  -0.051     0.96    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1244 on 21 degrees of freedom
Multiple R-squared:  0.0001251, Adjusted R-squared:  -0.04749 
F-statistic: 0.002628 on 1 and 21 DF,  p-value: 0.9596

#Visualizing genetic variation and greenhouse variation, which will be controlled for.

#GH Bench
ggplot(dat2)+
  geom_point(aes(y=gluc_Conc,x=gh_bench,colour=as.factor(gh_bench)))


#GH Col
ggplot(dat2)+
  geom_point(aes(y=gluc_Conc,x=gh_col,colour=as.factor(gh_bench)))


#Investigating genetic differences by treatment
#gluc_Conc
boxplot(gluc_Conc~Family,data=dat2[dat2$treatment=="a",])

boxplot(gluc_Conc~Family,data=dat2[dat2$treatment=="m",])

boxplot(gluc_Conc~Family,data=dat2[dat2$treatment=="gm",])

#bodymass
boxplot(GM_TotalLeaf_Area~Family,data=dat2[dat2$treatment=="a",])

boxplot(GM_TotalLeaf_Area~Family,data=dat2[dat2$treatment=="m",])

boxplot(GM_TotalLeaf_Area~Family,data=dat2[dat2$treatment=="gm",])

#Modelling: What influences performance (total leaf area)

#With limited dataset containing flavonoid data
summary(lmer(GM_TotalLeaf_Area ~ treatment*gluc_Conc+Fern+BlackPathDam +flav_Conc + (1 | gh_bench/gh_col) ,data=dat2))
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: GM_TotalLeaf_Area ~ treatment * gluc_Conc + Fern + BlackPathDam +  
    flav_Conc + (1 | gh_bench/gh_col)
   Data: dat2

REML criterion at convergence: 6388.9

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.2860 -0.5799  0.0639  0.5879  2.8024 

Random effects:
 Groups          Name        Variance Std.Dev.
 gh_col:gh_bench (Intercept)  588144   766.9  
 gh_bench        (Intercept) 1927899  1388.5  
 Residual                    7116930  2667.8  
Number of obs: 349, groups:  gh_col:gh_bench, 28; gh_bench, 5

Fixed effects:
                      Estimate Std. Error       df t value Pr(>|t|)    
(Intercept)           11705.91    2415.48   231.10   4.846 2.31e-06 ***
treatmentgm           -8139.74    2860.71   322.70  -2.845 0.004720 ** 
treatmentm            -8725.71    3156.29   328.21  -2.765 0.006022 ** 
gluc_Conc             -4183.45    2377.16   322.41  -1.760 0.079381 .  
Fern                   -122.68      56.51   330.47  -2.171 0.030631 *  
BlackPathDam           -162.81      41.42   328.48  -3.931 0.000103 ***
flav_Conc              2524.64    1213.99   338.49   2.080 0.038312 *  
treatmentgm:gluc_Conc  4230.28    2889.49   321.40   1.464 0.144165    
treatmentm:gluc_Conc   7832.59    3279.65   328.95   2.388 0.017493 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) trtmntg trtmntm glc_Cn Fern   BlckPD flv_Cn trtmntg:_C
treatmentgm -0.767                                                       
treatmentm  -0.697  0.588                                                
gluc_Conc   -0.856  0.736   0.675                                        
Fern        -0.001 -0.009  -0.055  -0.032                                
BlackPathDm -0.019  0.093   0.046  -0.053 -0.078                         
flav_Conc   -0.179  0.084   0.071  -0.269  0.049  0.102                  
trtmntgm:_C  0.751 -0.991  -0.577  -0.737  0.002 -0.094 -0.063           
trtmntm:g_C  0.669 -0.564  -0.993  -0.655  0.047 -0.042 -0.069  0.559    

#Model Diagnostics.

#Visualization – The conditional benefit of glucosinolates (allelopathy)


aloneSlope<-function(x){
  y=-3966.60*x+ 13751.96
  return(y)
}
mapleSlope<-function(x){
  y=(-4254.2+8417.95)*x+13751.96 -9243.71
  return(y)
}

mustardSlope<-function(x){
  y=+13751.96-8376.67#Non significant slope
  return(y)
}

minM<-min(dat2$gluc_Conc[dat2$treatment=="m"],na.rm = T)
maxM<-max(dat2$gluc_Conc[dat2$treatment=="m"],na.rm = T)

minA<-min(dat2$gluc_Conc[dat2$treatment=="a"],na.rm = T)
maxA<-max(dat2$gluc_Conc[dat2$treatment=="a"],na.rm = T)

minG<-min(dat2$gluc_Conc[dat2$treatment=="gm"],na.rm = T)
maxG<-max(dat2$gluc_Conc[dat2$treatment=="gm"],na.rm = T)

#tiff("Selection_Figures/Gluc_Benefit.tiff", units="in", width=10, height=6, res=300)
library(ggplot2)
ggplot(dat2)+
  geom_point(aes(y=GM_TotalLeaf_Area,x=gluc_Conc,colour=treatment),size=2)+
  geom_segment(x=minA,xend=maxA,y=aloneSlope(minA),yend=aloneSlope(maxA),colour="#009E73",size=1.5)+
    geom_segment(x=minM,xend=maxM,y=mapleSlope(minM),yend=mapleSlope(maxM),colour="#E69F00",size=1.5)+
  geom_segment(x=minG,xend=maxG,y=mustardSlope(minG),yend=mustardSlope(maxG),colour="#56B4E9",size=1.5)+theme_simple()+
  ylab(bquote(bold("Performance\n(Total Leaf Area "~(mm^2)~")")))+xlab(bquote(bold("[Total Glucosinolate] " (mg/ml))))+
  scale_colour_manual(values=c("#009E73","#56B4E9","#E69F00"),labels=c("Alone","Garlic Mustard","Maple"))

#dev.off()

minA
[1] 0.7713421

#Visualizing the effect of flavonoid on fitness.

summary(lmer(GM_TotalLeaf_Area~treatment+flav_Conc+BlackPathDam+Fern+(1|Family)+(1|gh_bench/gh_col), data=dat2))
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: GM_TotalLeaf_Area ~ treatment + flav_Conc + BlackPathDam + Fern +  
    (1 | Family) + (1 | gh_bench/gh_col)
   Data: dat2

REML criterion at convergence: 6473.8

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.1990 -0.6278  0.0560  0.6004  2.6495 

Random effects:
 Groups          Name        Variance Std.Dev.
 gh_col:gh_bench (Intercept)  660632   812.8  
 Family          (Intercept)  210327   458.6  
 gh_bench        (Intercept) 1767108  1329.3  
 Residual                    7165525  2676.8  
Number of obs: 350, groups:  gh_col:gh_bench, 28; Family, 23; gh_bench, 5

Fixed effects:
             Estimate Std. Error       df t value Pr(>|t|)    
(Intercept)   7682.44    1156.80    33.30   6.641 1.42e-07 ***
treatmentgm  -3897.18     385.80   333.76 -10.102  < 2e-16 ***
treatmentm   -1133.35     364.02   330.79  -3.113  0.00201 ** 
flav_Conc     2471.19    1040.31   341.75   2.375  0.01808 *  
BlackPathDam   -77.69      30.19   335.79  -2.573  0.01050 *  
Fern          -125.75      57.56   330.33  -2.185  0.02962 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) trtmntg trtmntm flv_Cn BlckPD
treatmentgm -0.294                              
treatmentm  -0.260  0.475                       
flav_Conc   -0.795  0.198   0.137               
BlackPathDm -0.163 -0.043   0.021   0.090       
Fern        -0.035 -0.037  -0.073   0.030 -0.156
aloneSlope<-function(x){
  y=2024.62 *x+  8035.30
  return(y)
}
mapleSlope<-function(x){
  y=(2024.62 )*x+ 8035.30 -1067.02
  return(y)
}

mustardSlope<-function(x){
  y=2024.62 *x+ 8035.30-3746.91#Non significant slope
  return(y)
}

minM<-min(dat2$flav_Conc[dat2$treatment=="m"],na.rm = T)
maxM<-max(dat2$flav_Conc[dat2$treatment=="m"],na.rm = T)

minA<-min(dat2$flav_Conc[dat2$treatment=="a"],na.rm = T)
maxA<-max(dat2$flav_Conc[dat2$treatment=="a"],na.rm = T)

minG<-min(dat2$flav_Conc[dat2$treatment=="gm"],na.rm = T)
maxG<-max(dat2$flav_Conc[dat2$treatment=="gm"],na.rm = T)


#tiff("Selection_Figures/Flav_Benefit.tiff", units="in", width=10, height=6, res=300)
ggplot(dat2)+
  geom_point(aes(y=GM_TotalLeaf_Area,x=flav_Conc,colour=treatment),size=2)+
  geom_segment(x=minA,xend=maxA,y=aloneSlope(minA),yend=aloneSlope(maxA),colour="#009E73",size=1.5)+
    geom_segment(x=minM,xend=maxM,y=mapleSlope(minM),yend=mapleSlope(maxM),colour="#E69F00",size=1.5)+theme_simple()+
  geom_segment(x=minG,xend=maxG,y=mustardSlope(minG),yend=mustardSlope(maxG),colour="#56B4E9",size=1.5)+theme_simple()+
  ylab(bquote(bold("Performance\n(Total Leaf Area "~(mm^2)~")")))+xlab(bquote(bold("[Total Flavonoid] " (mg/ml))))+
  scale_colour_manual(values=c("#009E73","#56B4E9","#E69F00"),labels=c("Alone","Garlic Mustard","Maple"))

#dev.off()

#Visualization– The Detriment of pathogens and ferns

summary(lmer(GM_TotalLeaf_Area ~ Fern+
(1 | Family) + (1 | gh_bench/gh_col) ,data=dat2))
boundary (singular) fit: see ?isSingular
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: GM_TotalLeaf_Area ~ Fern + (1 | Family) + (1 | gh_bench/gh_col)
   Data: dat2

REML criterion at convergence: 9607.6

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.85397 -0.70508  0.04291  0.74619  3.11787 

Random effects:
 Groups          Name        Variance Std.Dev.
 gh_col:gh_bench (Intercept)    96624  310.8  
 Family          (Intercept)        0    0.0  
 gh_bench        (Intercept)  2947085 1716.7  
 Residual                    10094168 3177.1  
Number of obs: 507, groups:  gh_col:gh_bench, 28; Family, 23; gh_bench, 5

Fixed effects:
            Estimate Std. Error       df t value Pr(>|t|)    
(Intercept) 7896.911    807.136    4.071   9.784 0.000561 ***
Fern        -226.020     56.097  432.082  -4.029 6.61e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
     (Intr)
Fern -0.055
convergence code: 0
boundary (singular) fit: see ?isSingular
a<-ggplot(dat2)+
  geom_point(aes(y=GM_TotalLeaf_Area,x=BlackPathDam))+theme_simple_multiCol()+
  geom_abline(intercept=8281.76,slope = -105.28,size=1.5)+
  xlab("Black Pathogen Damage")

b<-ggplot(dat2[dat2$WhiteFungDam<30,])+
  geom_point(aes(y=GM_TotalLeaf_Area,x=WhiteFungDam),colour="#999999")+theme_simple_multiCol()+
  theme(axis.title.x = element_text(color = "#999999", size = 16, face = "bold",margin=margin(3,0,3,0)),
      )+xlab("Powdery Mildew Damage")

c<-ggplot(dat2)+
  geom_point(aes(y=GM_TotalLeaf_Area,x=ThripsDam),colour="#E69F00")+theme_simple_multiCol()+xlab("Thrips Damage")+
    theme(axis.title.x = element_text(color = "#E69F00", size = 16, face = "bold",margin=margin(3,0,3,0)))

d<-ggplot(dat2)+
  geom_point(aes(y=GM_TotalLeaf_Area,x=Fern),colour="#009E73")+theme_simple_multiCol()+xlab("Fern Abundance")+
   geom_abline(intercept=8003.44,slope = -179.47,size=1.5,color="#009E73")+
  theme(axis.title.x = element_text(color = "#009E73", size = 16, face = "bold",margin=margin(3,0,3,0)))
   


plot<-plot_grid(a, b,ncol=2,rel_widths = c(1,1))
Removed 61 rows containing missing values (geom_point).Removed 61 rows containing missing values (geom_point).
plot2<-plot_grid(d, c,ncol=2,rel_widths = c(1,1))
Removed 9 rows containing missing values (geom_point).Removed 61 rows containing missing values (geom_point).
plot3<-plot_grid(a,b,c,d,ncol=2,rel_widths = c(1,1))
Removed 61 rows containing missing values (geom_point).Removed 61 rows containing missing values (geom_point).Removed 61 rows containing missing values (geom_point).Removed 9 rows containing missing values (geom_point).
plot4<-plot_grid(a,b,c,ncol=1,rel_widths = c(1,1,1))
Removed 61 rows containing missing values (geom_point).Removed 61 rows containing missing values (geom_point).Removed 61 rows containing missing values (geom_point).
plot5<-plot_grid(a,b,c,ncol=3,rel_widths = c(1,1,1))
Removed 61 rows containing missing values (geom_point).Removed 61 rows containing missing values (geom_point).Removed 61 rows containing missing values (geom_point).
y.grob<-textGrob(bquote(bold("Shoot Area "(mm^2))),gp=gpar(fontface="bold",fontsize=20),rot=90)


#tiff("Selection_Figures/PathogenEffect.tiff", units="in", width=14, height=6, res=300)
grid.arrange(plot,left=y.grob)

#dev.off()

#tiff("Selection_Figures/PathogenEffect2.tiff", units="in", width=14, height=6, res=300)
grid.arrange(plot2,left=y.grob)

#dev.off()

#tiff("Selection_Figures/PathogenEffect3.tiff", units="in", width=14, height=10, res=300)
grid.arrange(plot3,left=y.grob)

#dev.off

grid.arrange(plot4,left=y.grob)



#tiff("Selection_Figures/PathogenEffect5.tiff", units="in", width=16, height=5, res=300)
grid.arrange(plot5,left=y.grob)

#dev.off

#tiff("Selection_Figures/FernEffect.tiff", units="in", width=8, height=6, res=300)
grid.arrange(d,left=y.grob)

#dev.off

#—————————————– #Part 2

#Influence of gluc and flav on defence.

hist(dat$Fern)
Warning messages:
1: Unknown or uninitialised column: `WhiteFungLogis`. 
2: Unknown or uninitialised column: `WhiteFungLogis`. 
3: Unknown or uninitialised column: `WhiteFungLogis`. 
4: Unknown or uninitialised column: `WhiteFungLogis`. 
5: Unknown or uninitialised column: `WhiteFungLogis`. 
6: Unknown or uninitialised column: `WhiteFungLogis`. 
7: Unknown or uninitialised column: `WhiteFungLogis`. 
8: Unknown or uninitialised column: `WhiteFungLogis`. 
9: Unknown or uninitialised column: `WhiteFungLogis`. 
10: Unknown or uninitialised column: `WhiteFungLogis`. 

#Modelling : Negative binomial on Thrips Damage.

I think that a logistic regression is more appropriate for fungal abundance, because the count of fungal infection could be arbuitrary, especially when fungal patches were large or the leaf was completely covered.

#WhitePathDam – logistic regression

#Because of how zero inflated white pathogen damage is, i will use a logistic regression to model it.
dat2$WhiteFungLogis<-NA
dat2$WhiteFungLogis[dat2$WhiteFungDam==0]<-0
dat2$WhiteFungLogis[dat2$WhiteFungDam>0]<-1


#This is the biggest model that could converge. ... interaction with flavonoid could not.
fit_full_g<-glmer(WhiteFungLogis~treatment*gluc_Conc+flav_Conc+(1|Family),family=binomial,data=dat2[!is.na(dat2$flav_Conc),])

fit.1<-update(fit_full_g,~.-flav_Conc)
anova(fit_full_g,fit.1) #Flavonoid Concentration is not significant. 


#Testing glucosinolate treatment interaction. 
fit.2<-glmer(WhiteFungLogis~treatment*gluc_Conc+(1|Family),family=binomial,data=dat2)
fit.3<-glmer(WhiteFungLogis~treatment+gluc_Conc+(1|Family),family=binomial,data=dat2)
anova(fit.2,fit.3) #Glucosinolate:Treatment interaction is not significant.

#Testing glucosinolate involvment at all.
fit.4<-glmer(WhiteFungLogis~treatment+(1|Family),family=binomial,data=dat2[!is.na(dat2$gluc_Conc),])
fit.3<-glmer(WhiteFungLogis~treatment+gluc_Conc+(1|Family),family=binomial,data=dat2[!is.na(dat2$gluc_Conc),])
anova(fit.4,fit.3) #Glucosinolate Concentration is not a significant predictor

#Testing effect of treatment
fit.4<-glmer(WhiteFungLogis~treatment+(1|Family),family=binomial,data=dat2)
fit.5<-glmer(WhiteFungLogis~1+(1|Family),family=binomial,data=dat2)
anova(fit.4,fit.5)
#treatment is significant......

summary(fit.4) #Garlic mustard in the maple treatment have less occurence of fungal colonization. 

plot(fit.4)

#Modelling: Negative Binomial – BlackPathDam

plot(resid(fit_2))
Warning messages:
1: Unknown or uninitialised column: `WhiteFungLogis`. 
2: Unknown or uninitialised column: `WhiteFungLogis`. 

#Modelling: Negative Binomial- Fern abundance

anova(fit_2,fit_1)
Data: datFern
Models:
fit_2: Fern ~ 1 + (1 | Family/Tag) + (1 | gh_bench), zi=~0, disp=~1
fit_1: Fern ~ treatment + (1 | Family/Tag) + (1 | gh_bench), zi=~0, disp=~1
      Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)  
fit_2  5 772.95 794.18 -381.47   762.95                           
fit_1  7 770.33 800.06 -378.17   756.33 6.6129      2    0.03665 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

#Significant testing

#Visualizing – effect of treatment on proportion of fungal abundance.

#Summarizing for Display: generating frequency of white funal infection by treatment
plot<-dat2 %>% drop_na(WhiteFungLogis) %>% group_by(treatment) %>% summarize(PercWhitFung=sum(WhiteFungLogis)/length(WhiteFungLogis)*100)


#tiff("Defence_Figures/TreatMeanWhiteFung.tiff", units="in", width=8, height=5, res=300)
ggplot(plot)+
  geom_col(aes(x=treatment,y=PercWhitFung,fill=treatment))+theme_simple()+ylab("Fungal Abundance\n (% Infected)")+
  scale_y_continuous(breaks = seq(0,30,5))+
scale_x_discrete(name="",labels=c("Alone","Garlic Mustard","Maple"))+
  scale_fill_manual(values=c("#009E73","#56B4E9","#E69F00"),labels=c("Alone","Garlic Mustard","Maple"))+
  theme_simple_multiCol()+theme(axis.title.y =  element_text(color = "black", size = 16, face = "bold",margin=margin(3,20,3,0)))

#dev.off()

#Visualizing — the distribution of pathogens by treatment.

#I would need to account for the garlic mustard being psuedoreplicated iwth the fern data, because each gm in the gm treatment was grown with another gm in the same pot. 
ggplot(plot4)+
  geom_col(aes(x=treatment,y=Fern,fill=treatment))+theme_simple()+ylab("Average Fern Abundance (#Ferns/pot)")+xlab("Treatment")+theme(legend.position = "none")+
  scale_x_discrete(name="",labels=c("Alone","Garlic Mustard","Maple"))+
  scale_fill_manual(values=c("#009E73","#56B4E9","#E69F00"),labels=c("Alone","Garlic Mustard","Maple"))+
  theme_simple_multiCol()+theme(axis.title.y =  element_text(color = "black", size = 16, face = "bold",margin=margin(3,20,3,0)))
Error in ggplot(plot4) : object 'plot4' not found
In addition: Warning messages:
1: Unknown or uninitialised column: `WhiteFungLogis`. 
2: Unknown or uninitialised column: `WhiteFungLogis`. 
3: Unknown or uninitialised column: `WhiteFungLogis`. 
4: Unknown or uninitialised column: `WhiteFungLogis`. 

#Visualizing - influence of flavonoids on thrips damage.


PoisSlope=function(x){
  y=exp(-2.3319*x+3.5249)
  return(y)
}


flavplot=seq(min(dat$flav_Conc,na.rm = T),max(dat$flav_Conc,na.rm=T),length.out = 687)

flavy<-PoisSlope(flavplot)

#tiff("Defence_Figures/FlavonoidThrips.tiff", units="in", width=10, height=6, res=300)
ggplot(dat[!is.na(dat$flav_Conc)&!is.na(dat$flav_Conc),])+
  geom_point(aes(y=ThripsDam,x=flav_Conc))+theme_simple()+
  geom_path(x=flavplot,y=flavy,size=1,colour="#999999")+
  
  scale_y_continuous(breaks=c(0,5,10,15,20,25,30,35,40))+
  ylab("Thrips Damage")+
xlab(bquote(bold("[Total Flavonoid] " (mg/ml))))
#dev.off()

#Visualizing – effect of flavonoids on fern abundance.


PoisSlope=function(x,int){
  y=exp((-0.02353 -3.12475)*x+int)
  return(y)
}
exp(-0.6571)

flavplot=seq(min(dat$flav_Conc,na.rm = T),max(dat$flav_Conc,na.rm=T),length.out = 707)

flavyA<-PoisSlope(flavplot,-2.1683)
flavyM<-PoisSlope(flavplot,-1.60546-2.81450)
flavyGM<-PoisSlope(flavplot,-2.1683-0.2786)


#tiff("Defence_Figures/FlavonoidFern.tiff", units="in", width=10, height=6, res=300)
ggplot(dat)+
  geom_point(aes(y=Fern,x=flav_Conc,colour=treatment))+theme_simple()+
 # geom_path(x=flavplot,y=flavyA,size=1,colour="#009E73")
  #geom_path(x=flavplot,y=flavyGM,size=1,colour="#56B4E9")
  geom_path(x=flavplot,y=flavyM,size=1,colour="#E69F00")+
      scale_colour_manual(values=c("#009E73","#56B4E9","#E69F00"),labels=c("Alone","Garlic Mustard","Maple"))+
  scale_y_continuous(breaks=c(0,5,10,15,20,25,30,35,40))+
  ylab("Fern Abundance")+
xlab(bquote(bold("[Total Flavonoid] " (mg/ml))))
#dev.off()

How can we know that healthy plants dont just exhibit more secondary compounds and not that those with more secondary compounds are healthier?

---
title: "Selective benefit of Glucosinolate and flavonoids"
output: html_notebook
---
#Part 1
The purpose of this notebook is to determine the selective benefit of glucosinolate and flavonoid compounds through plant competition by creating selection gradients. Selection gradients will involve final body mass as the proxy for fitness, but this will be replaced with fitness once the measurement is in. Concentration will be on the x-axis. This analysis will account for family and greenhouse location. 

#Part 2
The second purpose is to determine if glucosinolates and flavonoids influence suscpetibility to pathogens and if this susceptibility results in increased fitness




#Read in and prep data
```{r}
library(ggplot2)
library(lme4)
library(tidyr)
library(cowplot)
library(grid)
library(gridExtra)
library(dplyr)
source("GGPlot_Themes.R")

#read in data
rm(list=ls())
dat<-read.csv("DataSynthesis.csv")

#Remove maple controls data from the data set.
dat<-dat %>% filter(treatment!="mcnt") %>% droplevels()

#Assign family column
prefamily<-gsub("*.\\|","",dat$Tag)
dat$Family<-gsub("\\-.*","",prefamily)

#remove those with fertilizer treatment, and extra genotypes that are only in the alone treatment. 
dat<-dat[!grepl("i",dat$Sample,fixed=T),]

#Initializing columns to avoid constant error messages. 
dat$WhiteFungLogis<-NA
dat$BlackFungLogis<-NA
```


#Generating data frames with summarized leaf data (dat2) and summarized genotype data (dat3)
```{r}
#Summarizing: Taking the mean value of leaves. This tibble contains data at the level of the plant means. 
dat2<-dat %>% group_by(Tag) %>% summarize(ChlorA=mean(ChlorA),ChlorB=mean(ChlorB),gluc_Conc=mean(gluc_Conc),flav_Conc=mean(flav_Conc),Family=first(Family),treatment=first(treatment),gh_row=first(gh_row),gh_bench=first(gh_bench),GM_TotalLeaf_Area=first(GM_TotalLeaf_Area),comp_number=first(comp_number),ThripsDam=mean(ThripsDam),WhiteFungDam=mean(WhiteFungDam),BlackPathDam=mean(BlackPathDam),Fern=mean(Fern),gh_col=first(gh_col))
dat

#All of these genotypes died (15 Genotypes).(We are simply missing a final measurement for e|JBCHY1-1-50|Q|240) That is only 3% Mortality. 
dead<-dat2[is.na(dat2$GM_TotalLeaf_Area),]

#Removing those with dead competitors from the garlic mustard treatment. ("e|JBCHY1-1-50|Q|240") did not die, we are simply missing the final measurement for it.

dead_competitors<-dead %>% filter(treatment=="gm",Tag!="e|JBCHY1-1-50|Q|240") %>% select(comp_number)

#Removing those with dead competitors from the analysis. 
dat2<-dat2 %>% filter(!comp_number %in% dead_competitors$comp_number)


##Summarizing: Taking the mean value of plants. This tibble contains data at the level of the family means within each treatment. 
dat3<-dat2 %>% drop_na(GM_TotalLeaf_Area) %>% group_by(Family,treatment)  %>% summarize_if(is.numeric,mean)

```


#Searching for a fitness trade-off between the alone and interspecific competition treatment. 
```{r}
source("GGPlot_Themes.R")

#Calculating family means within treatment. 
TradeOff<-dat3 %>% filter(treatment=="a") %>% drop_na(GM_TotalLeaf_Area) %>%  select(LeafSizeAlone=GM_TotalLeaf_Area,Family,gluc_ConcAlone=gluc_Conc) %>% right_join(dat3 %>% filter(treatment=="m") %>%  select(LeafSizeMaple=GM_TotalLeaf_Area,Family,gluc_ConcMaple=gluc_Conc),by="Family")

#Calculating standard error of each family
StdErr<-dat2 %>% select(Family,treatment,GM_TotalLeaf_Area) %>% group_by(Family,treatment) %>% drop_na(GM_TotalLeaf_Area) %>%  summarize(StdErr=sd(GM_TotalLeaf_Area)/sqrt(length(GM_TotalLeaf_Area)),size=length(GM_TotalLeaf_Area))

#Shifting the data to be in long form. 
StdErr2<-StdErr %>% filter(treatment=="a") %>% select(StdErrAlone=StdErr,Family) %>% right_join(StdErr %>% filter(treatment=="m") %>% select(StdErrMaple=StdErr,Family),by="Family")

TradeOff2<-StdErr2 %>% left_join(TradeOff)


#tiff("Selection_Figures/TradeOff.tiff", units="in", width=10, height=6, res=300)

ggplot(TradeOff2,aes(y=LeafSizeAlone,x=LeafSizeMaple))+
  geom_point()+
  geom_linerange(aes(ymin = LeafSizeAlone - StdErrAlone, 
                    ymax = LeafSizeAlone + StdErrAlone))+
  geom_errorbarh(aes(xmin = LeafSizeMaple - StdErrMaple,
                    xmax = LeafSizeMaple + StdErrMaple))+
theme_simple()+
  xlab("Performance with Maple")+
  ylab("Performance Alone")
#dev.off()
summary(lm(LeafSizeAlone~LeafSizeMaple,data=TradeOff2))

```





#Visualizing genetic variation and greenhouse variation, which will be controlled for. 
```{r}
#GH Bench
ggplot(dat2)+
  geom_point(aes(y=gluc_Conc,x=gh_bench,colour=as.factor(gh_bench)))

#GH Col
ggplot(dat2)+
  geom_point(aes(y=gluc_Conc,x=gh_col,colour=as.factor(gh_bench)))

#Investigating genetic differences by treatment
#gluc_Conc
boxplot(gluc_Conc~Family,data=dat2[dat2$treatment=="a",])
boxplot(gluc_Conc~Family,data=dat2[dat2$treatment=="m",])
boxplot(gluc_Conc~Family,data=dat2[dat2$treatment=="gm",])
#bodymass
boxplot(GM_TotalLeaf_Area~Family,data=dat2[dat2$treatment=="a",])
boxplot(GM_TotalLeaf_Area~Family,data=dat2[dat2$treatment=="m",])
boxplot(GM_TotalLeaf_Area~Family,data=dat2[dat2$treatment=="gm",])
```



#Modelling: What influences performance (total leaf area)
```{r}
#This is the full model.I expect that the influence of gluc conc and flav conc could vary between treatments because of allelopathy, therefore, i include interactions with these variables. However, i have no reason to think that pathogens or fern abundance could influence fitness differently in different treatments, therefore, no interactions are included.

#First lets ensure we are using the correct random effects.


fitfull<-lmer(GM_TotalLeaf_Area~treatment*gluc_Conc*flav_Conc+BlackPathDam+WhiteFungDam+ThripsDam+Fern+(1|Family)+(1|gh_bench/gh_row), data=dat2)

fitfull2<-lmer(GM_TotalLeaf_Area~treatment*gluc_Conc*flav_Conc+BlackPathDam+WhiteFungDam+ThripsDam+Fern+(1|Family)+(1|gh_bench/gh_col), data=dat2)

fitfull3<-lmer(GM_TotalLeaf_Area~treatment*gluc_Conc*flav_Conc+BlackPathDam+WhiteFungDam+ThripsDam+Fern+(1|Family)+(1|gh_bench), data=dat2)

fitfull4<-lmer(GM_TotalLeaf_Area~treatment*gluc_Conc*flav_Conc+BlackPathDam+WhiteFungDam+ThripsDam+Fern+(1|Family), data=dat2)

fitfull5<-lmer(GM_TotalLeaf_Area~treatment*gluc_Conc*flav_Conc+BlackPathDam+WhiteFungDam+ThripsDam+Fern+(1|gh_bench), data=dat2)

fitfull6<-lmer(GM_TotalLeaf_Area~treatment*gluc_Conc*flav_Conc+BlackPathDam+WhiteFungDam+ThripsDam+Fern+(1|gh_bench/gh_col), data=dat2)

anova(fitfull0,fitfull2,fitfull3,fitfull4,fitfull5,fitfull6) #Best model is fitfull6 and fitfull2. I will compare these two directly
anova(fitfull6,fitfull2) #Family is not a significant predictor and will be excluded, but greenhouse location is and will be included. 

#Modelling fixed effects. -----------

#Full Model
fitfull3<-lmer(GM_TotalLeaf_Area~treatment*gluc_Conc*flav_Conc+BlackPathDam+WhiteFungDam+ThripsDam+Fern+(1|gh_bench/gh_col), data=dat2)

#Removing three way interaction
fit.1<-update(fitfull3, ~.-treatment:gluc_Conc:flav_Conc)
anova(fitfull3,fit.1) #Good to remove

#Removing two way interaction gluc:flav
fit.2<-update(fit.1,~.-gluc_Conc:flav_Conc)
anova(fit.2,fit.1) #Good to remove. 

#Removing treatment:flavonoid interactions
fit.3<-update(fit.2,~.-treatment:flav_Conc)
anova(fit.2,fit.3) #Good to remove. 


#Flavonoid Concentration has missing samples. Therefore, I fit two models, one including flav_Conc and one that does not; however, both use the same limited dataset.
#No flav_Conc
fit.4<-lmer(GM_TotalLeaf_Area ~ treatment + gluc_Conc + BlackPathDam + WhiteFungDam +  
    ThripsDam + Fern +   treatment:gluc_Conc + (1 | gh_bench/gh_col)  ,data=dat2[!is.na(dat2$flav_Conc),])
#Yes flav_Conc
fit.3<-lmer(GM_TotalLeaf_Area ~ treatment + gluc_Conc + BlackPathDam + WhiteFungDam +  
    ThripsDam + Fern +   treatment:gluc_Conc+flav_Conc + (1 | gh_bench/gh_col)  ,data=dat2[!is.na(dat2$flav_Conc),])

anova(fit.3,fit.4) #Flav_Conc is a significant predictor. 

#Now that I know flav_conc is significant, I will conduct the rest of the model simplification using the whole data set (i.e. flav_Conc excluded --fit4) 



fit.nf<-lmer(GM_TotalLeaf_Area ~ treatment + gluc_Conc + BlackPathDam + WhiteFungDam +  
    ThripsDam + Fern +   treatment:gluc_Conc+ (1 | gh_bench/gh_col)  ,data=dat2)

fit.0.nf<-update(fit.nf,~.-treatment:gluc_Conc)
anova(fit.0.nf,fit.nf)  #The interaction is right on the verge of significance, I will keep it included. 

#Creating a subsetted dataframe for the black pathogen damage. 
fit.nf.bp<-lmer(GM_TotalLeaf_Area ~ treatment + gluc_Conc + BlackPathDam + WhiteFungDam +  
    ThripsDam + Fern +   treatment:gluc_Conc+ (1 | gh_bench/gh_col)  ,data=dat2[!is.na(dat2$BlackPathDam),])

fit.1.nf<-update(fit.nf.bp,~.-BlackPathDam)
anova(fit.nf.bp,fit.1.nf) #Black pathogen damage is a very significant predictor. Did not remove.

fit.2.nf<-update(fit.nf,~.-WhiteFungDam)
anova(fit.nf,fit.2.nf)  #White Fungal Damage was not significant, however, both the AIC and BIC was lower with it included, suggesting it may be an important variable. 


#Refitting a model for thrips dam 
fit.nf<-lmer(GM_TotalLeaf_Area ~ treatment + gluc_Conc + BlackPathDam +  
    ThripsDam + Fern +   treatment:gluc_Conc+ (1 | gh_bench/gh_col)  ,data=dat2[!is.na(dat2$ThripsDam),])

fit.3.nf<-update(fit.nf,~.-ThripsDam)
anova(fit.nf,fit.3.nf)  #Thrips Damage was also not a significant predictor, however, both the AIC and BIC was lower with it included, suggesting it may be an important predictor. 


fit.nf<-lmer(GM_TotalLeaf_Area ~ treatment + gluc_Conc + BlackPathDam +  
    ThripsDam +  Fern+  treatment:gluc_Conc+ (1 | gh_bench/gh_col)  ,data=dat2)

fit.4.nf<-update(fit.nf,~.-Fern)
anova(fit.nf,fit.4.nf)  #Fern abundance is significnat predictor performance.


library(lmerTest)
#The best model is therefore: 

#With limited dataset containing flavonoid data
summary(lmer(GM_TotalLeaf_Area ~ treatment*gluc_Conc+Fern+BlackPathDam +flav_Conc + (1 | gh_bench/gh_col) ,data=dat2))

#with whole data set lacking flavonoid data
summary(lmer(GM_TotalLeaf_Area ~ treatment*gluc_Conc+Fern+BlackPathDam  + (1 | gh_bench/gh_col) ,data=dat2))

#The negative slope associated with the alone treamtment is no longer significant, suggesting that there is only a postive slope of glucosinolates in the maple treatment. However, the issue with this analysis is that is does not account for the competitive strength of the maple competitor, which may be correlated with glucosinolate concentration. 

#Models with Thrips damage and white pathogen damage exhibited lower AIC and BIC values than those without, however, the likelyhood ratio test was not significant. Can I use poisson data as a predictor in this model? Should i log transform it? I am not sure. 



```


#Model Diagnostics. 
```{r}
#GlucConcInteraction
fit.Whole<-lmer(GM_TotalLeaf_Area ~ treatment*gluc_Conc + BlackPathDam+Fern+flav_Conc + (1 | gh_bench/gh_col) ,data=dat2)

confint(fit.Whole)

coef(fit.Whole)
#no heteroscedasticity
plot(fit.Whole)
#fairly normal. 
qqnorm(resid(fit.Whole))

#The model appears to be a good fit.
```


#Visualization -- The conditional benefit of glucosinolates (allelopathy)
```{r}

aloneSlope<-function(x){
  y=-3966.60*x+ 13751.96
  return(y)
}
mapleSlope<-function(x){
  y=(-4254.2+8417.95)*x+13751.96 -9243.71
  return(y)
}

mustardSlope<-function(x){
  y=+13751.96-8376.67#Non significant slope
  return(y)
}

minM<-min(dat2$gluc_Conc[dat2$treatment=="m"],na.rm = T)
maxM<-max(dat2$gluc_Conc[dat2$treatment=="m"],na.rm = T)

minA<-min(dat2$gluc_Conc[dat2$treatment=="a"],na.rm = T)
maxA<-max(dat2$gluc_Conc[dat2$treatment=="a"],na.rm = T)

minG<-min(dat2$gluc_Conc[dat2$treatment=="gm"],na.rm = T)
maxG<-max(dat2$gluc_Conc[dat2$treatment=="gm"],na.rm = T)

#tiff("Selection_Figures/Gluc_Benefit.tiff", units="in", width=10, height=6, res=300)
library(ggplot2)
ggplot(dat2)+
  geom_point(aes(y=GM_TotalLeaf_Area,x=gluc_Conc,colour=treatment),size=2)+
  geom_segment(x=minA,xend=maxA,y=aloneSlope(minA),yend=aloneSlope(maxA),colour="#009E73",size=1.5)+
    geom_segment(x=minM,xend=maxM,y=mapleSlope(minM),yend=mapleSlope(maxM),colour="#E69F00",size=1.5)+
  geom_segment(x=minG,xend=maxG,y=mustardSlope(minG),yend=mustardSlope(maxG),colour="#56B4E9",size=1.5)+theme_simple()+
  ylab(bquote(bold("Performance\n(Total Leaf Area "~(mm^2)~")")))+xlab(bquote(bold("[Total Glucosinolate] " (mg/ml))))+
  scale_colour_manual(values=c("#009E73","#56B4E9","#E69F00"),labels=c("Alone","Garlic Mustard","Maple"))
#dev.off()

minA


```

#Visualizing the effect of flavonoid on fitness.
```{r}
summary(lmer(GM_TotalLeaf_Area~treatment+flav_Conc+BlackPathDam+Fern+(1|Family)+(1|gh_bench/gh_col), data=dat2))

aloneSlope<-function(x){
  y=2024.62 *x+  8035.30
  return(y)
}
mapleSlope<-function(x){
  y=(2024.62 )*x+ 8035.30 -1067.02
  return(y)
}

mustardSlope<-function(x){
  y=2024.62 *x+ 8035.30-3746.91#Non significant slope
  return(y)
}

minM<-min(dat2$flav_Conc[dat2$treatment=="m"],na.rm = T)
maxM<-max(dat2$flav_Conc[dat2$treatment=="m"],na.rm = T)

minA<-min(dat2$flav_Conc[dat2$treatment=="a"],na.rm = T)
maxA<-max(dat2$flav_Conc[dat2$treatment=="a"],na.rm = T)

minG<-min(dat2$flav_Conc[dat2$treatment=="gm"],na.rm = T)
maxG<-max(dat2$flav_Conc[dat2$treatment=="gm"],na.rm = T)


#tiff("Selection_Figures/Flav_Benefit.tiff", units="in", width=10, height=6, res=300)
ggplot(dat2)+
  geom_point(aes(y=GM_TotalLeaf_Area,x=flav_Conc,colour=treatment),size=2)+
  geom_segment(x=minA,xend=maxA,y=aloneSlope(minA),yend=aloneSlope(maxA),colour="#009E73",size=1.5)+
    geom_segment(x=minM,xend=maxM,y=mapleSlope(minM),yend=mapleSlope(maxM),colour="#E69F00",size=1.5)+theme_simple()+
  geom_segment(x=minG,xend=maxG,y=mustardSlope(minG),yend=mustardSlope(maxG),colour="#56B4E9",size=1.5)+theme_simple()+
  ylab(bquote(bold("Performance\n(Total Leaf Area "~(mm^2)~")")))+xlab(bquote(bold("[Total Flavonoid] " (mg/ml))))+
  scale_colour_manual(values=c("#009E73","#56B4E9","#E69F00"),labels=c("Alone","Garlic Mustard","Maple"))
#dev.off()
```

#Visualization-- The Detriment of pathogens and ferns
```{r}
summary(lmer(GM_TotalLeaf_Area ~ Fern+
(1 | Family) + (1 | gh_bench/gh_col) ,data=dat2))

a<-ggplot(dat2)+
  geom_point(aes(y=GM_TotalLeaf_Area,x=BlackPathDam))+theme_simple_multiCol()+
  geom_abline(intercept=8281.76,slope = -105.28,size=1.5)+
  xlab("Black Pathogen Damage")

b<-ggplot(dat2[dat2$WhiteFungDam<30,])+
  geom_point(aes(y=GM_TotalLeaf_Area,x=WhiteFungDam),colour="#999999")+theme_simple_multiCol()+
  theme(axis.title.x = element_text(color = "#999999", size = 16, face = "bold",margin=margin(3,0,3,0)),
      )+xlab("Powdery Mildew Damage")

c<-ggplot(dat2)+
  geom_point(aes(y=GM_TotalLeaf_Area,x=ThripsDam),colour="#E69F00")+theme_simple_multiCol()+xlab("Thrips Damage")+
    theme(axis.title.x = element_text(color = "#E69F00", size = 16, face = "bold",margin=margin(3,0,3,0)))

d<-ggplot(dat2)+
  geom_point(aes(y=GM_TotalLeaf_Area,x=Fern),colour="#009E73")+theme_simple_multiCol()+xlab("Fern Abundance")+
   geom_abline(intercept=8003.44,slope = -179.47,size=1.5,color="#009E73")+
  theme(axis.title.x = element_text(color = "#009E73", size = 16, face = "bold",margin=margin(3,0,3,0)))
   


plot<-plot_grid(a, b,ncol=2,rel_widths = c(1,1))

plot2<-plot_grid(d, c,ncol=2,rel_widths = c(1,1))

plot3<-plot_grid(a,b,c,d,ncol=2,rel_widths = c(1,1))

plot4<-plot_grid(a,b,c,ncol=1,rel_widths = c(1,1,1))
plot5<-plot_grid(a,b,c,ncol=3,rel_widths = c(1,1,1))


y.grob<-textGrob(bquote(bold("Shoot Area "(mm^2))),gp=gpar(fontface="bold",fontsize=20),rot=90)


#tiff("Selection_Figures/PathogenEffect.tiff", units="in", width=14, height=6, res=300)
grid.arrange(plot,left=y.grob)
#dev.off()

#tiff("Selection_Figures/PathogenEffect2.tiff", units="in", width=14, height=6, res=300)
grid.arrange(plot2,left=y.grob)
#dev.off()

#tiff("Selection_Figures/PathogenEffect3.tiff", units="in", width=14, height=10, res=300)
grid.arrange(plot3,left=y.grob)
#dev.off

grid.arrange(plot4,left=y.grob)


#tiff("Selection_Figures/PathogenEffect5.tiff", units="in", width=16, height=5, res=300)
grid.arrange(plot5,left=y.grob)
#dev.off

#tiff("Selection_Figures/FernEffect.tiff", units="in", width=8, height=6, res=300)
grid.arrange(d,left=y.grob)
#dev.off

```



#-----------------------------------------
#Part 2





#Influence of gluc and flav on defence. 
```{r}

hist(dat$Fern)
hist(dat$ThripsDam)
hist(dat$WhiteFungDam)
hist(dat$BlackPathDam)

dat$BlackPathDam

#This data is very zero inflated and a poisson model will be too overdispersed. A Negative binomial distribution will be used. 
```



#Modelling : Negative binomial on Thrips Damage. 
```{r}
library("glmmTMB")
fit_full<-glmmTMB(ThripsDam~treatment+gluc_Conc+flav_Conc+(1|Family/Tag)+(1|gh_bench),family=nbinom2,data=dat)

fit_1<-update(fit_full,~.-treatment)
anova(fit_full,fit_1) #treatment is unimportant. 

fit_2<-update(fit_1,~.+ gluc_Conc:flav_Conc)

anova(fit_1,fit_2)#interaction unimportant. 


fit_1<-glmmTMB(ThripsDam~gluc_Conc+flav_Conc+(1|Family/Tag)+(1|gh_bench),family=nbinom2,data=dat[!is.na(dat$flav_Conc),])

fit_2<-update(fit_1,~.- flav_Conc)
anova(fit_1,fit_2)#
#Flav_Conc is highly important. 



fit_2<-update(fit_1,~.- gluc_Conc)
anova(fit_1,fit_2)# gluc conc does not appear to be significant. I will try using the whole gluc conc data set. 

fit_1<-glmmTMB(ThripsDam~gluc_Conc+(1|Family/Tag)+(1|gh_bench),family=nbinom2,data=dat[!is.na(dat$gluc_Conc),])

fit_2<-update(fit_1,~.- gluc_Conc)
anova(fit_1,fit_2)# gluc conc is highly significant however when flavonoids are excluded. Therefore secondary compounds, both glucosinolates and flavonoids predicts reduction in thrips damage. 

#Best model includes both, however, putting both in the same model will distribute the effect amoungst both of the terms. This might not be appropriate, but i will check for this. 

fit_g<-glmmTMB(ThripsDam~gluc_Conc+(1|Family/Tag)+(1|gh_bench),family=nbinom2,data=dat[!is.na(dat$gluc_Conc),])
fit_f<-glmmTMB(ThripsDam~flav_Conc+(1|Family/Tag)+(1|gh_bench),family=nbinom2,data=dat[!is.na(dat$flav_Conc),])
fit_full<-glmmTMB(ThripsDam~gluc_Conc+flav_Conc+(1|Family/Tag)+(1|gh_bench),family=nbinom2,data=dat)

summary(fit_f)$coef
summary(fit_g)$coef
summary(fit_full)$coef

#As expected, the coefficients are distributed evenly amoungst gluc and flav concentrations, the conclusion is that both reduce thrips abundance and it is not possible to tell which does the most, however, the effect is more significant for flavonoids. 

plot(resid(fit_f))
plot(resid(fit_f)^2)
#The model appears to be a very good fit. 

```


I think that a logistic regression is more appropriate for fungal abundance, because the count of fungal infection could be arbuitrary, especially when fungal patches were large or the leaf was completely covered. 

#WhitePathDam -- logistic regression 
```{r}
#Because of how zero inflated white pathogen damage is, i will use a logistic regression to model it.
dat2$WhiteFungLogis<-NA
dat2$WhiteFungLogis[dat2$WhiteFungDam==0]<-0
dat2$WhiteFungLogis[dat2$WhiteFungDam>0]<-1


#This is the biggest model that could converge. ... interaction with flavonoid could not.
fit_full_g<-glmer(WhiteFungLogis~treatment*gluc_Conc+flav_Conc+(1|Family),family=binomial,data=dat2[!is.na(dat2$flav_Conc),])

fit.1<-update(fit_full_g,~.-flav_Conc)
anova(fit_full_g,fit.1) #Flavonoid Concentration is not significant. 


#Testing glucosinolate treatment interaction. 
fit.2<-glmer(WhiteFungLogis~treatment*gluc_Conc+(1|Family),family=binomial,data=dat2)
fit.3<-glmer(WhiteFungLogis~treatment+gluc_Conc+(1|Family),family=binomial,data=dat2)
anova(fit.2,fit.3) #Glucosinolate:Treatment interaction is not significant.

#Testing glucosinolate involvment at all.
fit.4<-glmer(WhiteFungLogis~treatment+(1|Family),family=binomial,data=dat2[!is.na(dat2$gluc_Conc),])
fit.3<-glmer(WhiteFungLogis~treatment+gluc_Conc+(1|Family),family=binomial,data=dat2[!is.na(dat2$gluc_Conc),])
anova(fit.4,fit.3) #Glucosinolate Concentration is not a significant predictor

#Testing effect of treatment
fit.4<-glmer(WhiteFungLogis~treatment+(1|Family),family=binomial,data=dat2)
fit.5<-glmer(WhiteFungLogis~1+(1|Family),family=binomial,data=dat2)
anova(fit.4,fit.5)
#treatment is significant......

summary(fit.4) #Garlic mustard in the maple treatment have less occurence of fungal colonization. 

plot(fit.4)
```



#Modelling: Negative Binomial -- BlackPathDam 
```{r}
dat$BlackPathDam<-ceiling(dat$BlackPathDam)
fit_full<-glmmTMB(BlackPathDam~treatment+gluc_Conc+flav_Conc+(1|Family/Tag)+(1|gh_bench),family=nbinom2,data=dat)
fit_full_0.1<-glmmTMB(BlackPathDam~treatment+gluc_Conc+flav_Conc+(1|Family/Tag),family=nbinom2,data=dat)

anova(fit_full,fit_full_0.1)#gh_Bench was not an important random effect in this model. 

fit_full_0.1<-glmmTMB(BlackPathDam~treatment+gluc_Conc+flav_Conc+(1|Family/Tag),family=nbinom2,data=dat[!is.na(dat$flav_Conc),])
fit_1<-glmmTMB(BlackPathDam~treatment+gluc_Conc+(1|Family/Tag),family=nbinom2,data=dat[!is.na(dat$flav_Conc),])
anova(fit_full_0.1,fit_1) #Flav conc is a very important predictor of black pathogen damage. 

fit_1<-glmmTMB(BlackPathDam~treatment+gluc_Conc+(1|Family/Tag),family=nbinom2,data=dat[!is.na(dat$gluc_Conc),])
fit_2<-glmmTMB(BlackPathDam~treatment+(1|Family/Tag),family=nbinom2,data=dat[!is.na(dat$gluc_Conc),])
anova(fit_1,fit_2) #Glucosinolates are not a significant predictor at all. 


fit_2<-glmmTMB(BlackPathDam~treatment+flav_Conc+(1|Family/Tag),family=nbinom2,data=dat)
fit_3<-glmmTMB(BlackPathDam~flav_Conc+(1|Family/Tag),family=nbinom2,data=dat)
anova(fit_3,fit_2) #Treatment is a significant predictor of black pathogen damage. 


#Therefore, the best model is one with flavonoids and treatment. 

summary(fit_2)

plot(resid(fit_2)) #Model fits well.

```


#Modelling: Negative Binomial- Fern abundance
```{r}
#Because there are two garlic mustard individuals per pot, and the unit we are looking at is only replicable at the pot level (fern abundance in a pot) duplicates within the same pot in the garlic mustard treatment need to be removed and averaged to remove pseudoreplication. 

#What i will do is make the competition number the tag for those in the GM treatment. This will have the effect of maintaining the leaf variation, but averaging it over each pot. 

# I will need to use dat2, summarized at the individual level, because we have a single observation at the pot level, not at the leaf level. 
datFern<-dat2
datFern$Tag<-as.character(datFern$Tag)
for(i in 1:length(datFern$Tag)){
  if(!is.na(datFern$comp_number[i])){
    datFern$Tag[i]<-datFern$comp_number[i]
  }
}
datFern
#I am excluding flavonoid concentration because there were no detectable flavonoids in the root samples. 
fit_full<-glmmTMB(Fern~treatment+gluc_Conc+(1|Family/Tag)+(1|gh_bench),family=nbinom2,data=datFern)
fit_full_0.1<-glmmTMB(Fern~treatment+gluc_Conc+(1|Family/Tag),family=nbinom2,data=datFern)

anova(fit_full,fit_full_0.1)#Bench is an extremenly important predictor. 

fit_full<-glmmTMB(Fern~treatment+gluc_Conc+(1|Family/Tag)+(1|gh_bench),family=nbinom2,data=datFern[!is.na(datFern$gluc_Conc),])
fit_1<-glmmTMB(Fern~treatment+(1|Family/Tag)+(1|gh_bench),family=nbinom2,data=datFern[!is.na(datFern$gluc_Conc),])
anova(fit_full,fit_1)#Glucosinolates are not a significant predictor of fern abundance (although the AIC is about the same)

fit_full<-glmmTMB(Fern~treatment+gluc_Conc+(1|Family/Tag)+(1|gh_bench),family=nbinom2,data=datFern[!is.na(datFern$gluc_Conc),])
fit_1<-glmmTMB(Fern~treatment+(1|Family/Tag)+(1|gh_bench),family=nbinom2,data=datFern[!is.na(datFern$gluc_Conc),])
anova(fit_full,fit_1)


fit_1<-glmmTMB(Fern~treatment+(1|Family/Tag)+(1|gh_bench),family=nbinom2,data=datFern)

fit_2<-glmmTMB(Fern~1+(1|Family/Tag)+(1|gh_bench),family=nbinom2,data=datFern)
anova(fit_2,fit_1)
#Fern abundance is predicted by treatment, however. 



```





#Significant testing 





#Visualizing -- effect of treatment on proportion of fungal abundance. 
```{r}
#Summarizing for Display: generating frequency of white funal infection by treatment
plot<-dat2 %>% drop_na(WhiteFungLogis) %>% group_by(treatment) %>% summarize(PercWhitFung=sum(WhiteFungLogis)/length(WhiteFungLogis)*100)


#tiff("Defence_Figures/TreatMeanWhiteFung.tiff", units="in", width=8, height=5, res=300)
ggplot(plot)+
  geom_col(aes(x=treatment,y=PercWhitFung,fill=treatment))+theme_simple()+ylab("Fungal Abundance\n (% Infected)")+
  scale_y_continuous(breaks = seq(0,30,5))+
scale_x_discrete(name="",labels=c("Alone","Garlic Mustard","Maple"))+
  scale_fill_manual(values=c("#009E73","#56B4E9","#E69F00"),labels=c("Alone","Garlic Mustard","Maple"))+
  theme_simple_multiCol()+theme(axis.title.y =  element_text(color = "black", size = 16, face = "bold",margin=margin(3,20,3,0)))
#dev.off()

```






#Visualizing --- the distribution of pathogens by treatment. 
```{r}

plot2<-dat2 %>% drop_na(BlackPathDam) %>% group_by(treatment) %>% summarize(BlackPathAve=mean(BlackPathDam,na.rm=T))

plot3<-dat2 %>% drop_na(ThripsDam) %>% group_by(treatment) %>% summarize(ThripsDam=mean(ThripsDam,na.rm=T))

plot4<-dat2 %>% drop_na(Fern) %>% group_by(treatment) %>% summarize(Fern=mean(Fern,na.rm=T))


#Black Pathogen abundance by treamtent
ggplot(plot2)+
  geom_col(aes(x=treatment,y=BlackPathAve,fill=treatment))+theme_simple()+ylab("Fungal Abundance\n (% colonized)")+xlab("Treatment")+theme(legend.position = "none")

#Thrips abundance by treatment. 
ggplot(plot3)+
  geom_col(aes(x=treatment,y=ThripsDam,fill=treatment))+theme_simple()+ylab("Fungal Abundance\n (% colonized)")+xlab("Treatment")+theme(legend.position = "none")

#I would need to account for the garlic mustard being psuedoreplicated iwth the fern data, because each gm in the gm treatment was grown with another gm in the same pot. 
ggplot(plot4)+
  geom_col(aes(x=treatment,y=Fern,fill=treatment))+theme_simple()+ylab("Average Fern Abundance (#Ferns/pot)")+xlab("Treatment")+theme(legend.position = "none")+
  scale_x_discrete(name="",labels=c("Alone","Garlic Mustard","Maple"))+
  scale_fill_manual(values=c("#009E73","#56B4E9","#E69F00"),labels=c("Alone","Garlic Mustard","Maple"))+
  theme_simple_multiCol()+theme(axis.title.y =  element_text(color = "black", size = 16, face = "bold",margin=margin(3,20,3,0)))
#dev.off()



```












#Visualizing - influence of flavonoids on thrips damage. 
```{r}

PoisSlope=function(x){
  y=exp(-2.3319*x+3.5249)
  return(y)
}


flavplot=seq(min(dat$flav_Conc,na.rm = T),max(dat$flav_Conc,na.rm=T),length.out = 687)

flavy<-PoisSlope(flavplot)

#tiff("Defence_Figures/FlavonoidThrips.tiff", units="in", width=10, height=6, res=300)
ggplot(dat[!is.na(dat$flav_Conc)&!is.na(dat$flav_Conc),])+
  geom_point(aes(y=ThripsDam,x=flav_Conc))+theme_simple()+
  geom_path(x=flavplot,y=flavy,size=1,colour="#999999")+
  
  scale_y_continuous(breaks=c(0,5,10,15,20,25,30,35,40))+
  ylab("Thrips Damage")+
xlab(bquote(bold("[Total Flavonoid] " (mg/ml))))
#dev.off()

```




#Visualizing -- effect of flavonoids on fern abundance. 
```{r}

PoisSlope=function(x,int){
  y=exp((-0.02353 -3.12475)*x+int)
  return(y)
}
exp(-0.6571)

flavplot=seq(min(dat$flav_Conc,na.rm = T),max(dat$flav_Conc,na.rm=T),length.out = 707)

flavyA<-PoisSlope(flavplot,-2.1683)
flavyM<-PoisSlope(flavplot,-1.60546-2.81450)
flavyGM<-PoisSlope(flavplot,-2.1683-0.2786)


#tiff("Defence_Figures/FlavonoidFern.tiff", units="in", width=10, height=6, res=300)
ggplot(dat)+
  geom_point(aes(y=Fern,x=flav_Conc,colour=treatment))+theme_simple()+
 # geom_path(x=flavplot,y=flavyA,size=1,colour="#009E73")
  #geom_path(x=flavplot,y=flavyGM,size=1,colour="#56B4E9")
  geom_path(x=flavplot,y=flavyM,size=1,colour="#E69F00")+
      scale_colour_manual(values=c("#009E73","#56B4E9","#E69F00"),labels=c("Alone","Garlic Mustard","Maple"))+
  scale_y_continuous(breaks=c(0,5,10,15,20,25,30,35,40))+
  ylab("Fern Abundance")+
xlab(bquote(bold("[Total Flavonoid] " (mg/ml))))
#dev.off()

```

How can we know that healthy plants dont just exhibit more secondary compounds and not that those with more secondary compounds are healthier?





